/*LIS Cross-section Data center in Luxembourg*/

/*email: usersupport@lisdatacenter.org*/

/*LIS Self Teaching Package 2022*/
/*Part I: Inequality, poverty, and social policy*/
/*SAS version*/

/*last change of this version of the syntax: 15-01-2022*/

/*Exercise 6: Comparing income concepts*/

OPTIONS NONOTES NOSOURCE NOFMTERR NODATE NONUMBER NOCENTER LABEL LS=MAX PS=MAX;
TITLE "";
/**-----------------------------**/
/** PART I: DEFINE SUB-ROUTINES **/ 
/**-----------------------------**/
%MACRO Gini ;
	PROC SORT DATA=&dataset ;
	  BY &var ;
	RUN ;
	DATA Gini (KEEP=gini) ;
	    IF _N_ = 1 THEN
	        DO UNTIL (last) ;
	            SET &dataset END=last;
				swt + &wgt ;
				swtey + (&wgt*&var) ;
	        END ;
	    SET current END=eof;
	        IF _N_ = 1 THEN
	            DO ;
	                prewt = 0 ;
					preey = 0 ;
					up    = 0 ;
					sum   = 0 ;
	            END ;
	           cwt + &wgt ;
		   	cwtey + (&var*&wgt);
		   	pcwt   = cwt / swt * 100;
		   	pcwtey = cwtey / swtey * 100;
		   	up     = (pcwt-prewt) * (pcwtey+preey) ;
		   	sum + up ;
		   	prewt = pcwt ;
	       	preey = pcwtey ;

		   	RETAIN prewt preey ;
			IF eof THEN
	            DO ;
	               gini=1-(sum / 10000) ;
	               OUTPUT ;
	            END ;
	RUN;
	PROC MEANS DATA=Gini MEAN ;
	RUN;
%MEND Gini ;
%MACRO equival ;
 %LET i = 1 ;
 %DO %UNTIL (&i > 4) ;
	%LET tmpv  = %SCAN(&incTyp,&i) ;
	DATA current ;
	 SET current ;
		e&tmpv  = &tmpv ;
		IF e&tmpv=. THEN DELETE;
		IF (&tmpv<0)  THEN e&tmpv=0;
		log&tmpv=log(e&tmpv); 
		IF( (log&tmpv=.)  AND (e&tmpv^=.) ) THEN log&tmpv=0;
	RUN;
	PROC SORT DATA=current ;
	  BY did log&tmpv;
RUN ;
PROC UNIVARIATE DATA=current NOPRINT;
 VAR log&tmpv   ; 
   WEIGHT hpopwgt; 
    OUTPUT OUT= temp P25=q25   P75=q75; 
RUN ; 
DATA _NULL_; 
  SET temp; 
    CALL SYMPUT("b",q25); 
    CALL SYMPUT("t",q75); 
 RUN; 
DATA current ; 
SET current ;  
	iqr=&t-&b; 
	upper_bound=&t + (iqr * 3) ; 
	lower_bound=&b - (iqr * 3); 
	IF e&tmpv>exp(upper_bound) THEN e&tmpv=exp(upper_bound) ; 
	IF e&tmpv<exp(lower_bound) THEN e&tmpv=exp(lower_bound); 
	e&tmpv = e&tmpv	/ SQRT(nhhmem) ;
RUN ;
	%LET i = %EVAL(&i+1) ;
 %END ;
%MEND equival ;

%MACRO indic ;
 %LET j = 1 ;
 %DO %UNTIL (&j > 4) ;
	%LET etmpv = %SCAN(&incTyp,&j) ;
	%LET dataset = current;
	%LET var     = &etmpv ;
	%LET wgt     = ipwgt  ;
	%gini ;
	DATA current ;
	 SET current ;
	 	poor&j = 0 ;
		IF &etmpv < povlin THEN poor&j = 1 ;
	RUN ;		
	PROC FREQ DATA=current;
	   TABLES poor&j ;
	   WEIGHT ipwgt;
	RUN ;
	%LET j = %EVAL(&j+1) ;
 %END ;
%MEND indic ;

/**-----------------------------------**/
/**  PART II: RUN THE MAIN PROGRAMME  **/ 
/**-----------------------------------**/
DATA current ;
 SET &gt06h (KEEP=dhi hifactor hpublic hpub_i hpub_u hpub_a hi33 hiprivate hxitsc hpopwgt nhhmem grossnet did) ;
	miss_comp = 0 ;
	IF 	(dhi=. | hpub_i=. | hpub_u=. | hpub_a=. | hi33=. | hiprivate=. | hxitsc=.) THEN miss_comp = 1 ;
	IF miss_comp = 1 THEN DELETE ;
	ipwgt =  hpopwgt*nhhmem ; 
RUN ;

PROC MEANS DATA=&gt06h;
 VAR hpublic hpub_i hpub_u hpub_a ;
RUN;

DATA current ;
 SET current ;
	dhitb = dhi                                  ;
 	mi    = (sum(hifactor,hiprivate,hi33))  				      ;
	siti  = (sum(hifactor,hiprivate,hi33,hpub_i,hpub_u,-hxitsc)) ;
     sa = (sum(hifactor,hiprivate,hi33,hpub_a)) ;
RUN ;
%LET incTyp = mi siti sa dhitb ;
%equival


PROC MEANS DATA=current NOPRINT;
  VAR edhitb ;
  WEIGHT ipwgt ;
  OUTPUT OUT=temp MEDIAN=mededhi;
RUN ;
DATA _NULL_;
 SET temp;
   CALL SYMPUT("m",mededhi);
RUN;
DATA current ;
 SET current ;
	povlin = &m * 0.5 ;
RUN ;

%LET incTyp = emi esiti esa edhitb ;
%indic
